"""
sigma_search
============

This module provides a simple implementation of the self‑tuning bandwidth
estimation used in the Diffusion‑Aligned Embeddings (DAE) algorithm.

In the original UMAP and DAE implementations the bandwidth (``sigma``) for
each point is chosen such that the sum of kernel values over its nearest
neighbours reaches a target probability mass.  In the absence of the
proprietary search routine, this simplified version estimates the bandwidth
for each point as the mean of its (non‑offset) neighbour distances.  A
minimum floor is applied to avoid zero bandwidths.

This approximation preserves the basic behaviour of adapting the kernel
bandwidth to local density while avoiding the computational cost of a
numerical search.
"""
from __future__ import annotations

import numpy as np
from typing import Callable

def self_tuning_sigma(
    rho: np.ndarray,
    distances: np.ndarray,
    k: int,
    *,
    kernel_function: Callable[[float, np.ndarray], float] | None = None,
    kernel_params: np.ndarray | None = None,
) -> np.ndarray:
    """Estimate per‑sample bandwidths.

    Parameters
    ----------
    rho : (n_samples,)
        Local connectivity offsets (first neighbour distance for each sample).
    distances : (n_samples, k-1)
        Distances to the k‑1 nearest neighbours (excluding self).  These
        distances should correspond to the same ordering used to compute
        ``rho``.
    k : int
        Number of neighbours considered (including the 0th neighbour used
        for ``rho``).
    kernel_function : callable, optional
        A kernel function accepting ``s`` and ``kernel_params``.  Not used
        in this simplified implementation; included for API compatibility.
    kernel_params : array‑like, optional
        Additional parameters for ``kernel_function``.  Not used here.

    Returns
    -------
    sigma : (n_samples,)
        Estimated bandwidth for each sample.  Each value is at least 1e‑6.

    Notes
    -----
    This implementation does **not** perform the numerical search used in
    UMAP or the original DAE algorithm.  Instead, it computes a simple
    measure of local scale by taking the mean of the distances to a
    sample's nearest neighbours (after subtracting ``rho``).  This is
    intended as a fallback when the full search routine is unavailable.
    """
    if distances.ndim != 2:
        raise ValueError("distances must be a 2D array of shape (n_samples, k-1)")
    if distances.shape[1] != k - 1:
        raise ValueError(f"distances must have k-1 columns, got {distances.shape[1]} for k={k}")
    # Compute positive local distances (subtract rho and clip at zero)
    diff = np.maximum(distances - rho[:, None], 0.0)
    # Estimate sigma as the mean of positive distances for each sample
    sigma = diff.mean(axis=1)
    # Avoid zero bandwidths
    sigma[sigma <= 0.0] = 1e-6
    return sigma.astype(np.float32, copy=False)

__all__ = ["self_tuning_sigma"]